*     *****************************************************************  * 
*     *****************************************************************  * 
*       File-Name:      getSpainDataset.do                               *  
*       Date:           30 May 2016                                      * 
*       Author:         Gschwend/Shukhova                                         * 
*       Purpose:      	get aggregated data for spain					 *
* 	    Input Files:    SURV_SPA_Catalonia_01_12_2013_SurveyData_cleaned.dta         * 
*						SURV_SPA_Madrid_SurveyData_cleaned.dta			            *
* 						SURV_SPA_NAT_Catalonia_12_19_2012_SurveyData_cleaned.dta    *
* 						SURV_SPA_NAT_Madrid_12_19_2012_SurveyData_cleaned.dta       * 
*						SURV_SPA_Catalonia_europe_09_22_2014_SurveyData_cleaned.dta *
*						SURV_SPA_NAT_Madrid_12_19_2012_SurveyData_cleaned.dta       *
*       Data Output:    aggregated_Spain.dta                            *              
*     ****************************************************************   * 
*     ****************************************************************   * 


* Briefwahl

version 14.2
clear
capture log close
set more off


* Thomas G.'s local (office) path:
* local path /Users/gschwend/Dropbox/Andre/OUP multilevel electoral behavior book/chapter 6/replication/


* Thomas G.'s local (Air) path:
* local path /Users/thomasgschwend/Dropbox/Andre/OUP multilevel electoral behavior book/chapter 6/replication/
 


*** Save variables for Catalonia at the regional level

use "SURV_SPA_Catalonia_01_12_2013_SurveyData_cleaned.dta", clear


gen eid = 301
gen COUNTRY = "Spain"
gen LEVEL = "Regional"
gen ELECID = "Catalonia"

gen district = SD2C


gen importance_regional = Q34A
gen importance_national = Q34B
gen importance_european = Q34C


gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party5 = Q17E
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G 
gen Q17_party8 = Q17H          
gen Q17_party9 = Q17I



gen polknow1 = (Q10A==2)
gen polknow2 = (Q10B==4)
gen polknow3 = (Q10C==6)
gen polknow4 = (Q10D==8)
gen polknow5 = (Q10E==10)

alpha polknow1  polknow2 polknow3 polknow4 polknow5, detail item gen(polknow)

keep eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow AGE female
log using Parties_Spain.log, replace
*spainCatalonReg
fre Q8A Q17*
log off

compress
save "spainCatalonReg.dta", replace


*** Save variables for Madrid at the regional level

use "SURV_SPA_Madrid_SurveyData_cleaned.dta", clear 

gen eid = 302
gen COUNTRY = "Spain"
gen LEVEL = "Regional"
gen ELECID = "Madrid"

gen district = 5


gen importance_regional = Q34A
gen importance_national = Q34B
gen importance_european = Q34C

gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party5 = Q17E
gen Q17_party6 = Q17F  

gen polknow1 = (Q10A==2)
gen polknow2 = (Q10B==4)
gen polknow3 = (Q10C==6)
gen polknow4 = (Q10D==8)
gen polknow5 = (Q10E==10)

alpha polknow1  polknow2 polknow3 polknow4 polknow5, detail item gen(polknow)

keep eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow age gend
log on
*spainMadridReg
fre Q8A Q17*
log off

rename age AGE

recode gend (2 = 1 ) (1 = 0) 
label define gend 1 "female" 0 "male", modify
rename gend female

compress
save "spainMadridReg.dta", replace






*** Save variables for Catalonia at the national level

use "SURV_SPA_NAT_Catalonia_12_19_2012_SurveyData_cleaned.dta", clear


gen eid = 311
gen COUNTRY = "Spain"
gen LEVEL = "National"
gen ELECID = "Catalonia"

gen district = SD2C


gen importance_regional = Q34A
gen importance_national = Q34B
gen importance_european = Q34C

gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party5 = Q17E
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G

gen polknow1 = (Q10A==2)
gen polknow2 = (Q10B==4)
gen polknow3 = (Q10C==6)
gen polknow4 = (Q10D==8)
gen polknow5 = (Q10E==10)

alpha polknow1  polknow2 polknow3 polknow4 polknow5, detail item gen(polknow)

keep eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow AGE GEND
log on
* spainCatalonNat
fre Q8A Q17*
log off

recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

compress
save "spainCatalonNat.dta", replace







*** Save variables for Madrid at the national level


use "SURV_SPA_NAT_Madrid_12_19_2012_SurveyData_cleaned.dta", clear


gen eid = 312
gen COUNTRY = "Spain"
gen LEVEL = "National"
gen ELECID = "Madrid"

gen district=5

gen importance_regional = Q34A
gen importance_national = Q34B
gen importance_european = Q34C

gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 



gen polknow1 = (Q10A==2)
gen polknow2 = (Q10B==4)
gen polknow3 = (Q10C==6)
gen polknow4 = (Q10D==8)

alpha polknow1  polknow2 polknow3 polknow4, detail item gen(polknow)


keep eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow AGE GEND
log on
* spainMadridNat
fre Q8A Q17*
log off

recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

compress
save "spainMadridNat.dta", replace





*** Save variables for Catalonia at the european level

use "SURV_SPA_Catalonia_europe_09_22_2014_SurveyData_cleaned.dta", clear

gen eid = 331
gen COUNTRY = "Spain"
gen LEVEL = "European"
gen ELECID = "Catalonia"

gen district = SD2C

gen importance_regional = Q34A
gen importance_national = Q34B
gen importance_european = Q34C

gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party5 = Q17E
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G


gen polknow1 = (Q10A==3)
gen polknow2 = (Q10B==1)
gen polknow3 = (Q10C==11)
gen polknow4 = (Q10D==9)
gen polknow5 = (Q10E==7)
gen polknow6 = (Q10F==5)
gen polknow7 = (Q10G==13)

alpha polknow1  polknow2 polknow3 polknow4 polknow5 polknow6 polknow7, detail item gen(polknow)

keep eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow AGE GEND
log on
* spainCatalonEU
fre Q8A Q17*
log off

recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

compress
save "spainCatalonEU.dta", replace





*** Save variables for Madrid at the european level

use "SURV_SPA_Madrid_europe_09_22_2014_SurveyData_cleaned.dta", clear

gen eid = 332
gen COUNTRY = "Spain"
gen LEVEL = "European"
gen ELECID = "Madrid"

gen district=5


gen importance_regional = Q34A
gen importance_national = Q34B
gen importance_european = Q34C

gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party5 = Q17E
gen Q17_party6 = Q17F  


gen polknow1 = (Q10A==3)
gen polknow2 = (Q10B==1)
gen polknow3 = (Q10C==7)
gen polknow4 = (Q10D==11)
gen polknow5 = (Q10E==5)
gen polknow6 = (Q10F==9)

alpha polknow1  polknow2 polknow3 polknow4 polknow5 polknow6, detail item gen(polknow)


keep eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow AGE GEND
log on
* spainMadridEU
fre Q8A Q17*
log close

recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

compress
save "spainMadridEU.dta", replace


* append datasets 
local datasets "spainCatalonReg spainCatalonNat spainCatalonEU spainMadridReg spainMadridNat"

foreach dataset in  `datasets' {
		append using `dataset'
		rm "`dataset'.dta"
	     		}


gen caseid = _n
sort caseid

* Check data consistency 0
sum caseid
return list
assert r(N)== 5800  & r(min)==1 & r(max)==5800 & r(sum)== 16822900



label define district 1 "Barcelona" 2 "Tarragona" 3 "Lleida" 4 "Girona" 5 "Madrid" 9 "DK"
label value district  district
fre district


************************************XXXXXXXXXX

order eid COUNTRY LEVEL ELECID district Q8A Q17_party* importance_* Q47A Q47B polknow AGE female, last

gen  pid = Q47A 
gen spid = Q47B
ren AGE age

drop Q47*

label define Q8A 1 "party 1" 2 "party 2" 3 "party 3" 4 "party 4" 5 "party 5" 6 "party 6" 7 "party 7" 8 "party 8" 9 "party 9", modify
fre Q8A
recode Q8A 80/max = .
fre Q8A


* Check data consistency 1
sum  Q8A
return list
assert r(N)== 3717  & r(min)==1 & r(max)==9 & r(sum)== 12498

sum caseid
return list
assert r(N)== 5800  & r(min)==1 & r(max)==5800 & r(sum)== 16822900



* Check wether coding is correct
bysort ELECID: tab eid LEVEL

*create id to merge in party information for some elections
gen id = eid*100 + Q8A
sort id caseid
*merge id using parties.dta


merge m:1 id using "parties_merge.dta"
tab _merge

/*
   Result                           # of obs.
    -----------------------------------------
    not matched                         2,174
        from master                     2,083  (_merge==1)
        from using                         91  (_merge==2)

    matched                             3,717  (_merge==3)
    -----------------------------------------
*/

keep if _merge==3

* Check data consistency 2
sum caseid
return list
assert r(N)== 3717  & r(min)==2 & r(max)==5800 & r(sum)== 11068815



drop _merge 

* Indicator for wasted vote 
gen seat_current0 = (seat_current==0)
gen seat_previous0 = (seat_previous==0)

* based-on district-level results
replace seat_current0=0 if eid==301 /*Catalonia Regional Election */
replace seat_current0=1 if eid==301 & partynumber==9 & district!=1
replace seat_current0=1 if eid==301 & partynumber==8 
replace seat_current0=1 if eid==301 & partynumber==7
replace seat_current0=1 if eid==301 & partynumber==6 & (district==4 | district==3)

replace seat_previous0=0 if eid==301 /*Catalonia Regional Election */
replace seat_previous0=1 if eid==301 & partynumber==9 
replace seat_previous0=1 if eid==301 & partynumber==8 
replace seat_previous0=1 if eid==301 & partynumber==7 & (district==2 | district==3)
replace seat_previous0=1 if eid==301 & partynumber==6 & (district!=1)
replace seat_previous0=1 if eid==301 & partynumber==3 & (district==3)


replace seat_current0=0 if eid==311 /*Catalonia National Election */
replace seat_current0=1 if eid==311 & partynumber==7
replace seat_current0=1 if eid==311 & partynumber==6 
replace seat_current0=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_current0=1 if eid==311 & partynumber==3 & district!=1

replace seat_previous0=0 if eid==311 /*Catalonia National Election */
replace seat_previous0=1 if eid==311 & partynumber==7 
replace seat_previous0=1 if eid==311 & partynumber==6 
replace seat_previous0=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_previous0=1 if eid==311 & partynumber==3 & (district==2 | district==4)
replace seat_previous0=1 if eid==311 & partynumber==2 & (district==4)

compress

* Check data consistency 3
sum caseid
return list
assert r(N)== 3717  & r(min)==2 & r(max)==5800 & r(sum)== 11068815
drop caseid

sum seat_current0
return list
assert r(N)== 3717  & r(min)==0 & r(max)==1 & r(sum)== 174

sum seat_previous0
return list
assert r(N)== 3717  & r(min)==0 & r(max)==1 & r(sum)== 560

save "aggregated_Spain.dta", replace





